R Markdown
if (!require("knitr")) install.packages("knitr")
if (!require("SummarizedExperiment")) install.packages("SummarizedExperiment")
if (!require("edgeR")) install.packages("edgeR")
if (!require("tidyr")) install.packages("tidyr")
if (!require("ggplot2")) install.packages("ggplot2")
if (!require("scales")) install.packages("scales")
if (!require("plotly")) install.packages("plotly")
source("functions.R")
# VARIABLES
data_samples <- params$data_samples
data_counts <- params$data_counts
data_annotation <- params$data_annotation
method <- params$method
excluded_samples <- params$excluded_samples
design_value <- params$design_value
matrix_value <- params$matrix_value
alpha <- params$alpha
method
## [1] "voom"
excluded_samples
## character(0)
design_value
## [1] "~0 + group"
matrix_value
## [1] "cALCL - CD4"
alpha
## [1] 0.01
#READ DATA INTO VARIABLE
#data_samples <- read.csv("~/Desktop/final_310/meta2.csv", row.names=1, header = TRUE, sep = ",")
#data_counts <- read.csv("~/Desktop/final_310/all_samples.fragments_per_gene.tsv", row.names=1, header = TRUE, sep = "\t")
#data_counts <- data_counts[!grepl('^__', rownames(data_counts)),]
#data_annotation <- read.csv("~/Desktop/final_310/gene_annotation.tsv", row.names=1, header = TRUE, sep = "\t")
#READ FILES INTO SE (function: alignment)
se <- readCountsFromTable(data_counts)
alignmentSummaryPlot(se)
complexityPlot(se)
#READ FILES INTO SE (function: alignment)
se <- addSamplesFromTableToSE(se, data_samples)
se <- addAnnotationsFromTableToSE(se, data_annotation)
#GET DGE
dge <- DGEList(counts = assay(se), samples = colData(se), genes = rowData(se))
row.names(dge$genes) <- row.names(dge$counts)
dge <- dge[ rowSums( abs( dge$counts ) ) > 1, ]
countDistributionLinePlot(dge)
#GET HIGH EXPRESSED FEATURES (function: raw_data)
#? Szymon how to determine which method to use
#p310 uses edger, for edger analysis
#p306 uses else, for edger and voom/limma analysis
if (method == "edger") {
edger <- calcNormFactors( dge, method = "TMM")
counts <- cpm(edger, log = TRUE)
selectedFeatures <- rownames( edger )[ apply( counts, 1, function( v ) sum( v >= 1 ) ) >= 1/4 * ncol( counts ) ]
} else {
selectedFeatures <- ( rowSums( cpm( dge ) > 10 ) >= 3 )
}
#GET HIGH EXPRESSED FEATURES (function: raw_data)
highExprDge <- dge[ selectedFeatures,, keep.lib.sizes = FALSE ]
# NORMALISE DATA (function: norm_data)
normDge <- calcNormFactors( highExprDge, method = "TMM")
# FILTER SAMPLES IF NECESSARY
if (!is.null(excluded_samples)) {
normDge$counts <- normDge$counts[,!colnames(normDge$counts) %in% excluded_samples]
data_samples <- data_samples[!rownames(data_samples) %in% excluded_samples, ]
data_samples <- droplevels(data_samples)
se <- addSamplesFromTableToSE(se, data_samples)
tempDge <- DGEList(counts = normDge$counts, samples = colData(se), genes = normDge$genes)
tempDge <- calcNormFactors( tempDge, method = "TMM")
normDge <- tempDge
}
tempDge <- normDge
tempDge$counts <- 2^(cpm(normDge, log = TRUE))
countDistributionLinePlot(tempDge)
variancePcaPlot(tempDge)
# CREATE DESIGN
design <- model.matrix(eval(parse(text=design_value)), normDge$samples)
design
## groupcALCL groupCD4
## KT_1 1 0
## KT_2 1 0
## KT_3 1 0
## KT_4 1 0
## KT_5 1 0
## KT_6 1 0
## KT_7 1 0
## KT_8 1 0
## KT_9 1 0
## KT_10 1 0
## KT_11 1 0
## KT_12 1 0
## ERR431582 0 1
## ERR431598 0 1
## ERR431614 0 1
## ERR431620 0 1
## ERR431626 0 1
## ERR431575 0 1
## ERR431576 0 1
## ERR431588 0 1
## ERR431603 0 1
## ERR431616 0 1
## ERR431571 0 1
## ERR431574 0 1
## ERR431580 0 1
## ERR431581 0 1
## ERR431610 0 1
## ERR431595 0 1
## ERR431597 0 1
## ERR431604 0 1
## ERR431618 0 1
## ERR431625 0 1
## ERR431570 0 1
## ERR431577 0 1
## ERR431584 0 1
## ERR431587 0 1
## ERR431622 0 1
## ERR431566 0 1
## ERR431579 0 1
## ERR431600 0 1
## ERR431615 0 1
## ERR431628 0 1
## ERR431583 0 1
## ERR431589 0 1
## ERR431601 0 1
## ERR431606 0 1
## ERR431609 0 1
## attr(,"assign")
## [1] 1 1
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
#moet dit in een specifieke volgorde?: (~0 + dex + celltype - VS - ~0 + celltype + dex)
for (design_column in colnames(design)) {
for (sample_column in colnames(normDge$samples)) {
if (grepl(sample_column, design_column)) {
colnames(design) <- sub(sample_column, "", colnames(design))
}
}
}
matrix_value <- strsplit(matrix_value, ",")[[1]]
contr.matrix <- makeContrasts(contrasts = matrix_value, levels=design)
contr.matrix
## Contrasts
## Levels cALCL - CD4
## cALCL 1
## CD4 -1
# FIT
if (method == "edger") {
#normDge <- estimateGLMCommonDisp(normDge, design)
#normDge <- estimateGLMTrendedDisp(normDge, design)
#normDge <- estimateGLMTagwiseDisp(normDge, design)
normDge <- estimateDisp(normDge, design)
vfit <- glmFit(normDge, design)
efit <- glmLRT(vfit, contrast = contr.matrix)
} else {
voom <- voom(normDge, design)
vfit <- lmFit(voom)
vfit <- contrasts.fit(vfit, contrasts = contr.matrix)
efit <- eBayes(vfit)
}
efit$DE <- decideTests(efit, p.value = alpha)
summary(efit$DE)
## cALCL - CD4
## Down 6564
## NotSig 3459
## Up 5777
# CREATE deTab TABLE
if (method == "edger") {
deTab <- topTags(efit, n=Inf)$table
deTab <- deTab[order(rownames(deTab)),]
deTab$DE <- efit$DE
deTab$avgLog2CPM <- efit$table$logCPM
deTab$avgLog2FC <- efit$table$logFC
deTab$adj.P.Val <- p.adjust(deTab$PValue, method = "BH")
colnames(deTab)[colnames(deTab)=="PValue"] <- "P.Value"
} else {
deTab <- data.frame(topTable(efit, coef=1, n=Inf))
deTab <- deTab[order(rownames(deTab)),]
deTab$DE <- efit$DE
colnames(deTab)[colnames(deTab)=="logFC"] <- "avgLog2FC"
colnames(deTab)[colnames(deTab)=="AveExpr"] <- "avgLog2CPM"
}
maPlot(deTab)
pValuePlot(deTab)
allGenes <- deTab
allGenes <- allGenes[order(rank(allGenes$adj.P.Val)),]
DEG <- allGenes[allGenes$adj.P.Val < alpha,]
DEG <- DEG[which(DEG$avgLog2FC < -2 | DEG$avgLog2FC > 2),]
up <- DEG[which(DEG$DE == 1),]
down <- DEG[which(DEG$DE == -1),]
deTab <- deTab[which(deTab$avgLog2FC < -2 | deTab$avgLog2FC > 2),]
# ENRICHMENT ANALYSIS
save(efit, file="efit.RData")
# SAVE ANALYSIS
normDge$counts <- 2^(cpm(normDge, log = TRUE))
save(deTab, normDge, allGenes, DEG, file="analysis.RData")
#INFO
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=nl_NL.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=nl_NL.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=nl_NL.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=nl_NL.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] plotly_4.9.0 scales_1.0.0
## [3] ggplot2_3.2.1 tidyr_1.0.0
## [5] SummarizedExperiment_1.14.1 DelayedArray_0.10.0
## [7] BiocParallel_1.18.1 matrixStats_0.55.0
## [9] Biobase_2.44.0 GenomicRanges_1.36.1
## [11] GenomeInfoDb_1.20.0 IRanges_2.18.3
## [13] S4Vectors_0.22.1 BiocGenerics_0.30.0
## [15] knitr_1.25 shinyjs_1.0
## [17] shinyWidgets_0.4.9 shinydashboard_0.7.1
## [19] shiny_1.4.0.9000 edgeR_3.26.8
## [21] limma_3.40.6
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.1 splines_3.6.1 viridisLite_0.3.0
## [4] bit64_0.9-7 jsonlite_1.6 assertthat_0.2.1
## [7] blob_1.2.0 GenomeInfoDbData_1.2.1 yaml_2.2.0
## [10] pillar_1.4.2 RSQLite_2.1.2 backports_1.1.5
## [13] lattice_0.20-38 glue_1.3.1 digest_0.6.22
## [16] RColorBrewer_1.1-2 promises_1.1.0 XVector_0.24.0
## [19] colorspace_1.4-1 htmltools_0.4.0 httpuv_1.5.2
## [22] Matrix_1.2-17 pkgconfig_2.0.3 zlibbioc_1.30.0
## [25] purrr_0.3.3 xtable_1.8-4 GO.db_3.8.2
## [28] later_1.0.0 tibble_2.1.3 DT_0.9
## [31] withr_2.1.2 lazyeval_0.2.2 magrittr_1.5
## [34] crayon_1.3.4 mime_0.7 memoise_1.1.0
## [37] evaluate_0.14 data.table_1.12.6 rsconnect_0.8.15
## [40] tools_3.6.1 org.Hs.eg.db_3.8.2 lifecycle_0.1.0
## [43] stringr_1.4.0 munsell_0.5.0 locfit_1.5-9.1
## [46] AnnotationDbi_1.46.1 packrat_0.5.0 compiler_3.6.1
## [49] rlang_0.4.1 grid_3.6.1 RCurl_1.95-4.12
## [52] rstudioapi_0.10 htmlwidgets_1.5.1 crosstalk_1.0.0
## [55] bitops_1.0-6 rmarkdown_1.16 gtable_0.3.0
## [58] DBI_1.0.0 R6_2.4.0 dplyr_0.8.3
## [61] fastmap_1.0.1 bit_1.1-14 zeallot_0.1.0
## [64] stringi_1.4.3 Rcpp_1.0.2 vctrs_0.2.0
## [67] tidyselect_0.2.5 xfun_0.10